
package com.jstarcraft.ai.jsat.text.wordweighting;

import java.util.List;

import com.jstarcraft.ai.jsat.linear.Vec;
import com.jstarcraft.ai.jsat.math.IndexFunction;

/**
 * WordWeighting is an index function specifically mean for modifying the values
 * of a vectors used for a bag-of-words representation of text data. <br>
 * Some Word weighting schemes may need information about the document
 * collection as a whole before constructing the weightings, and this class
 * provides the facilities for this to be done in a standardized manner.
 * 
 * @author Edward Raff
 */
public interface WordWeighting extends IndexFunction {

    /**
     * Prepares the word weighting to be performed on a data set. This should be
     * called once before being applied to any vectors. Different WordWeightings may
     * require different amounts of computation to set up.
     * 
     * @param allDocuments the list of all vectors that make up the set of
     *                     documents. The word vectors should be unmodified,
     *                     containing the value of how many times a word appeared in
     *                     the document for each index.
     * @param df           a list mapping each integer index of a word to how many
     *                     times that word occurred in total
     */
    public void setWeight(List<? extends Vec> allDocuments, List<Integer> df);

    /**
     * The implementation may want to pre compute come values based on the vector it
     * is about to be applied to. This should be called in place of
     * {@link Vec#applyIndexFunction(com.jstarcraft.ai.jsat.math.IndexFunction) } .
     * The vector should be in a bag-of-words form where each index value indicates
     * how many times the word for that index occurred in the document represented
     * by the vector.
     * 
     * @param vec the vector to set up for and then alter by invoking
     *            {@link Vec#applyIndexFunction(com.jstarcraft.ai.jsat.math.IndexFunction) }
     *            on
     */
    public void applyTo(Vec vec);
}
